9a0ab8336353d436ded86d6e19954efa1d43a223,experiment/src/main/java/zemberek/corpus/WebDocument.java,WebDocument,fromText,#String#List#,75

Before Change


        String id = url.replaceAll("http://|https://", "");
        String source = Regexps.firstMatch(sourcePattern, meta, 2);
        String crawlDate = Regexps.firstMatch(crawlDatePattern, meta, 2);
        String labels = Regexps.firstMatch(labelPattern, meta, 2).replace('\"', ' ').trim();
        String category = Regexps.firstMatch(categoryPattern, meta, 2).replace('\"', ' ').trim();
        String title = Regexps.firstMatch(titlePattern, meta, 2).replace('\"', ' ').trim();

After Change


        String id = url.replaceAll("http://|https://", "");
        String source = Regexps.firstMatch(sourcePattern, meta, 2);
        String crawlDate = Regexps.firstMatch(crawlDatePattern, meta, 2);
        String labels =  getAttribute(Regexps.firstMatch(labelPattern, meta, 2));

        String category = getAttribute(Regexps.firstMatch(categoryPattern, meta, 2));
        String title = getAttribute(Regexps.firstMatch(titlePattern, meta, 2));